import scrapy
import pymysql.cursors
import re
from urllib.parse import urljoin
class DmozSpider(scrapy.Spider):
    name = "movie"
    allowed_domains = ["pvpba.cn"]
    start_urls = [
        "http://m.pvpba.cn/frim/index2-pg-1.html"
    ]
    conn = pymysql.connect(host='127.0.0.1',user='root',password='198482115',db='movie',charset='utf8',cursorclass=pymysql.cursors.DictCursor)
    def parse(self, response):
        li = response.xpath('//*[@id="data_list"]/li/div')
        net = response.xpath("//div[contains(@class,'page')]/a[last()-1]")
        next = urljoin(response.url,net.xpath('@href').extract_first())
        next_text = net.xpath('text()').extract_first()
        #print(next_text,next)
        #print(next)
        href = []
        for l in li:
            movie_url = l.xpath('a/@href').extract_first()
            movie_url = urljoin(response.url,movie_url)
            #href.push(urljoin(response.url,t))
            title = l.xpath("a/span[@class='sTit']/text()").extract_first()
            image = l.xpath('a/img/@src').extract_first()
            search = re.match("(.+\/index)([0-9]+)",movie_url).groups()
            #print(search)
            #continue
            movie_id = search[1];
            sql1 = "SELECT * FROM `bs_movie` where `mid` = "+ movie_id
            res = []
            with self.conn.cursor() as cursor:
                cursor.execute(sql1)
                res = cursor.fetchone()
                #print(res)
            self.conn.commit()    
            #print(movie_url,title,image)
            if(res):
                print('')
            else:
                sql = "INSERT INTO `bs_movie` (`title`, `image`,`link`,`mid`) VALUES (%s,%s,%s,%s)"
                with self.conn.cursor() as cursor:
                    cursor.execute(sql,(title,image,movie_url,movie_id))
                    id = int(cursor.lastrowid)
                self.conn.commit()   
        if(next_text =='下一页'):
            yield scrapy.Request(next,callback=self.parse)
